1. Import Data and Library

library(Seurat)
## Attaching SeuratObject
library(SeuratData)
## Registered S3 method overwritten by 'cli':
##   method     from         
##   print.boxx spatstat.geom
## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used

## Warning in if (is.na(desc)) {: the condition has length > 1 and only the first
## element will be used
## ── Installed datasets ───────────────────────────────────── SeuratData v0.2.1 ──
## ✓ bmcite       0.3.0                    ✓ pbmcMultiome 0.1.0
## ────────────────────────────────────── Key ─────────────────────────────────────
## ✓ Dataset loaded successfully
## > Dataset built with a newer version of Seurat than installed
## ❓ Unknown version of Seurat installed
library(cowplot)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
load("seurat_data.RData")

# Import and Read Data
RNA_dat <- bm@assays$RNA
# RNA_mat <- t(as.matrix(RNA_dat@counts))

# Extract labels
cell_type <- bm$celltype.l2
cell_labels <- unique(cell_type)
# rand_ind <- c()
# 
# for (cell in cell_labels){
#   set.seed(10)
#   
#   subcell_ind <- which(cell_type == cell)
#   subcell_len <- length(subcell_ind)
#   subcell_mat <- RNA_mat[subcell_ind, ]
#   
#   row_ind <- apply(subcell_mat, 1, function(x){length(which(x != 0))}) 
#   idx <- order(row_ind, decreasing = T)
#   
#   rand_ind <- c(rand_ind, idx[1:(subcell_len/30)])
# }
# 
# sub_dat <- RNA_mat[rand_ind, ]
# 
# col_ind <- apply(sub_dat, 2, function(x){length(which(x != 0))})
# idx <- order(col_ind, decreasing = T)[1:500]
# 
# sub_dat <- sub_dat[, idx]
# 
# dat_hclust <- hclust(dist(t(sub_dat)))
# dat_index <- dat_hclust$order

# sub_dat <- sub_dat[, dat_index]
# sub_celltype <- cell_type[rand_ind]
sub_cluster_labels <- as.numeric(as.factor(sub_celltype))
cor_pearson_mat <- stats::cor(sub_dat[1:5, 1:5], method = "pearson")

cor_pearson_mat[upper.tri(cor_pearson_mat, diag = T)] <- NA
cor_pearson_mat[1:5,1:5]
##               IGKC     MALAT1        HBB  HBA2 HBA1
## IGKC            NA         NA         NA    NA   NA
## MALAT1 -0.39669581         NA         NA    NA   NA
## HBB    -0.09041483 -0.3775809         NA    NA   NA
## HBA2    0.32936831 -0.6591801 -0.2500000    NA   NA
## HBA1   -0.63936201  0.2332117  0.5833333 -0.25   NA

2-1. Dependency Measures

1. Pearson’s correlation coefficient

cor_pearson_mat <- stats::cor(sub_dat, method = "pearson")

cor_pearson_mat[upper.tri(cor_pearson_mat, diag = T)] <- NA
cor_pearson_mat[1:5,1:5]
##                IGKC       MALAT1       HBB      HBA2 HBA1
## IGKC             NA           NA        NA        NA   NA
## MALAT1  0.032614908           NA        NA        NA   NA
## HBB    -0.004736592  0.029307231        NA        NA   NA
## HBA2   -0.002841992 -0.018170839 0.9418259        NA   NA
## HBA1   -0.003328044 -0.003815931 0.9624631 0.9927025   NA
# plot the smallest correlations
cor_pearson_vec <- sort(abs(cor_pearson_mat), decreasing = T)
plot(cor_pearson_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_pearson_mat) == cor_pearson_vec[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_pearson_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_pearson_mat) == rev(cor_pearson_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_pearson_mat[idx1, idx2], 3)))
}

2. Spearman’s correlation coefficient

  • captures monotonous relationship within data.
  • the runtime is very short compared to other methods.
cor_spearman_mat <- stats::cor(sub_dat, method = "spearman")

cor_spearman_mat[upper.tri(cor_spearman_mat, diag = T)] <- NA
cor_spearman_mat[1:5,1:5]
##              IGKC     MALAT1       HBB      HBA2 HBA1
## IGKC           NA         NA        NA        NA   NA
## MALAT1 0.12899695         NA        NA        NA   NA
## HBB    0.09838046 0.15514668        NA        NA   NA
## HBA2   0.06958243 0.04661870 0.2025643        NA   NA
## HBA1   0.10954437 0.06171355 0.1585412 0.2075422   NA
# plot the smallest correlations
cor_spearman_vec <- sort(abs(cor_spearman_mat), decreasing = T)
plot(cor_spearman_vec)

#plot the high correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_spearman_mat) == cor_spearman_vec[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_spearman_mat[idx1, idx2], 3)))
}

#plot the lowest correlations
par(mfrow = c(2,2))
for(i in 1:4){
 idx <- which(abs(cor_spearman_mat) == rev(cor_spearman_vec)[i], arr.ind = T)
 idx1 <- idx[1]; idx2 <- idx[2]
 
 plot(sub_dat[,idx1], sub_dat[,idx2], col = sub_cluster_labels, asp = T,
      pch = 16, xlab = paste0(colnames(sub_dat)[idx1], ", (", idx1, ")"),
      ylab = paste0(colnames(sub_dat)[idx2], ", (", idx2, ")"), 
      main = paste0("Correlation of ", round(cor_spearman_mat[idx1, idx2], 3)))
}

2-2. Defining functions for next section

library(reshape2) # melt function
library(ggplot2) # ggplot function
library(pcaPP) # Fast Kendall function
library(energy) # Distance Correlation
library(Hmisc) # Hoeffding's D measure
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following object is masked from 'package:SeuratObject':
## 
##     Key
## The following object is masked from 'package:Seurat':
## 
##     Key
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(zebu) # Normalized Mutual Information
# library(minerva) # Maximum Information Coefficient
library(XICOR) # Chatterjee's Coefficient
# library(dHSIC) # Hilbert Schmidt Independence Criterion
library(VineCopula) # Blomqvist's Beta

make_cormat <- function(dat_mat){
matrix_dat <- matrix(nrow = ncol(dat_mat), ncol = ncol(dat_mat))
  cor_mat_list <- list()
  
  basic_cor <- c("pearson", "spearman")
  # find each of the correlation matrices with Pearson, Spearman, Kendall Correlation Coefficients
  for (i in 1:2){
    print(i)
    cor_mat <- stats::cor(dat_mat, method = basic_cor[i])
    cor_mat[upper.tri(cor_mat, diag = T)] <- NA
    cor_mat_list[[i]] <- cor_mat
  }
  
  # functions that take matrix or data.frame as input
  no_loop_function <- c(pcaPP::cor.fk, Hmisc::hoeffd, 
                        VineCopula::BetaMatrix)
  for (i in 3:5){
    print(i)
    fun <- no_loop_function[[i-2]]
    cor_mat <- fun(dat_mat)
    if (i == 4){ # Hoeffding's D
      cor_mat <- cor_mat$D
    }
    cor_mat[upper.tri(cor_mat, diag = T)] <- NA
    cor_mat_list[[i]] <- cor_mat
  }
  
  # functions that take two variables as input to calculate correlations.
  need_loop <- c(zebu::lassie, energy::dcor2d, XICOR::calculateXI)

  for (i in 6:8){
    print(i)
    fun <- need_loop[[i-5]]
    
    cor_mat <- matrix(nrow = ncol(dat_mat),
                      ncol = ncol(dat_mat))
    
    for (j in 2:ncol(dat_mat)){
      for (k in 1:(j-1)){
        if (i == 6){
          cor_mat[j, k] <- fun(cbind(dat_mat[, j], dat_mat[, k]), continuous=c(1,2), breaks = 6, measure = "npmi")$global

        } else {
          cor_mat[j, k] <- fun(as.numeric(dat_mat[, j]),
                               as.numeric(dat_mat[, k]))
        }
      }
    }
    
    cor_mat[upper.tri(cor_mat, diag = T)] <- NA
    cor_mat_list[[i]] <- cor_mat
  }
  return(cor_mat_list)
}

draw_heatmap <- function(cor_mat){
    len <- 8
    melted_cormat <- melt(cor_mat)
    melted_cormat <- melted_cormat[!is.na(melted_cormat$value),]
    break_vec <- round(as.numeric(quantile(melted_cormat$value,
                                           probs = seq(0, 1, length.out = len),
                                           na.rm = T)),
                       4)
    break_vec[1] <- break_vec[1]-1
    break_vec[len] <- break_vec[len]+1
    melted_cormat$value <- cut(melted_cormat$value, breaks = break_vec)
    heatmap_color <- unique(melted_cormat$value)
  
    heatmap <- ggplot(data = melted_cormat, aes(x = Var2, y = Var1, fill = value))+
      geom_tile(colour = "Black") +
      ggplot2::scale_fill_manual(breaks = sort(heatmap_color), 
                                 values = rev(scales::viridis_pal(begin = 0, end = 1)
                                              (length(heatmap_color)))) +
      theme_bw() + # make the background white
      theme(panel.border = element_blank(), panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(), axis.ticks = element_blank(),
            # erase tick marks and labels
            axis.text.x = element_blank(), axis.text.y = element_blank())
    
    return (heatmap)
}

make_cor_heatmap <- function(dat_mat, cor_mat_list){
  fun_lable <- c("Pearson's Correlation", "Spearman's Correlation", "Kendall's Correlation",
                 "Hoeffding's D", "Blomqvist's Beta", "NMI", 
                 "Distance Correlation", "XI Correlation")
  
  cor_heatmap_list <- list()
  cor_abs_heatmap_list <- list()
  
  # make correlation matrices
  #cor_mat_list <- make_cormat(dat_mat)
  
  for (i in 1:8){
    cor_mat <- cor_mat_list[[i]]
    
    # get heatmaps
    cor_heatmap <- draw_heatmap(cor_mat)
    
    # ggplot labels
    ggplot_labs <- labs(title = paste("Heatmap of", fun_lable[i]),
                      x = "",
                      y = "",
                      fill = "Coefficient") # change the title and legend label
      
    cor_heatmap_list[[i]] <- cor_heatmap + ggplot_labs
    
    if (i %in% c(1,2,3,4,6)){
      cor_abs_mat <- abs(cor_mat_list[[i]])
      cor_abs_heatmap <- draw_heatmap(cor_abs_mat)
      ggplot_abs_labs <- labs(title = paste("Abs Heatmap of", fun_lable[i]),
                              x = "", # change the title and legend label
                              y = "", 
                              fill = "Coefficient") 
      cor_abs_heatmap_list[[i]] <- cor_abs_heatmap + ggplot_abs_labs
    } else {
      ggplot_abs_labs <- labs(title = paste("Abs Heatmap of", fun_lable[i]),
                              subtitle = "Equivalent to Non-Abs Heatmap",
                              x = "", # change the title and legend label
                              y = "", 
                              fill = "Coefficient") 
      cor_abs_heatmap_list[[i]] <- cor_heatmap + ggplot_abs_labs
    }
  }
  
  ans <- list(cor_heatmap_list, cor_abs_heatmap_list)
  
  return (ans)
}
# cormat_list <- make_cormat(sub_dat)
# lst <- make_cor_heatmap(sub_dat, cormat_list)

load("Seurat_dependency.RData")

# lst[[1]]
lst[[1]][[4]]

cor_pearson_mat <- cormat_list[[1]]; cor_spearman_mat <- cormat_list[[2]];
cor_kendall_mat <- cormat_list[[3]]; cor_hoeffd_mat <- cormat_list[[4]];
cor_blomqvist_mat <- cormat_list[[5]]; cor_dist_mat <- cormat_list[[6]];
cor_MI_mat <- cormat_list[[7]]; cor_XI_mat <- cormat_list[[8]];

3. Find contrast characteristics among the correlation coefficients above

3-1-1. Low Pearson (< 0.40) and High Spearman (> 0.60) (linearity vs monotone)

cor_contrast1 <- (abs(cor_pearson_mat) < 0.4) & (abs(cor_spearman_mat) > 0.6)
cor_contrast_ind1 <- which(cor_contrast1, arr.ind = T)
nrow(cor_contrast_ind1)
## [1] 3

3-1-2. Visualization of Low Pearson (< 0.40) and High Spearman (> 0.60) (linearity vs monotone)

par(mfrow = c(2, 2))
for (i in 1:nrow(cor_contrast_ind1)){
  index1 <- cor_contrast_ind1[i, 1]; index2 <- cor_contrast_ind1[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                    "\n",
                    paste0("Spearman of ", round(cor_spearman_mat[index1, index2], 3))))
}

3-2-1. High Pearson (> 0.89) and Low Spearman (< 0.15) (linearity vs monotone)

cor_contrast2 <- (abs(cor_pearson_mat) > 0.80) & (abs(cor_spearman_mat) < 0.15)
cor_contrast_ind2 <- which(cor_contrast2, arr.ind = T)
nrow(cor_contrast_ind2)
## [1] 12

3-2-2. Visualization of High Pearson (> 0.89) and Low Spearman (< 0.15) (linearity vs monotone)

par(mfrow = c(2, 6))
for (i in 1:nrow(cor_contrast_ind2)){
  index1 <- cor_contrast_ind2[i, 1]; index2 <- cor_contrast_ind2[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                    "\n",
                    paste0("Spearman of ", round(cor_spearman_mat[index1, index2], 3))))
}

3-3-1. Low Pearson (< 0.50) and High Kendall (> 0.60) (linearity vs monotone)

cor_contrast3 <- (abs(cor_pearson_mat) < 0.5) & (abs(cor_kendall_mat) > 0.6)
cor_contrast_ind3 <- which(cor_contrast3, arr.ind = T)
nrow(cor_contrast_ind3)
## [1] 1

3-3-2. Visualization of Low Pearson (< 0.50) and High Kendall (> 0.60) (linearity vs monotone)

par(mfrow = c(1, 2))
for (i in 1:nrow(cor_contrast_ind3)){
  index1 <- cor_contrast_ind3[i, 1]; index2 <- cor_contrast_ind3[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                    "\n",
                    paste0("Kendall of ", round(cor_kendall_mat[index1, index2], 3))))
}

3-4-1. High Pearson (> 0.85) and Low Kendall (< 0.15) (linearity vs monotone)

cor_contrast4 <- (abs(cor_pearson_mat) > 0.85) & (abs(cor_kendall_mat) < 0.15)
cor_contrast_ind4 <- which(cor_contrast4, arr.ind = T)
nrow(cor_contrast_ind4)
## [1] 7

3-4-2. Visualization of High Pearson (> 0.85) and Low Kendall (< 0.15) (linearity vs monotone)

par(mfrow = c(2, 4))
for (i in 1:nrow(cor_contrast_ind4)){
  index1 <- cor_contrast_ind4[i, 1]; index2 <- cor_contrast_ind4[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                    "\n",
                    paste0("Kendall of ", round(cor_kendall_mat[index1, index2], 3))))
}

3-5-1. Low Pearson (< 0.50) and High Hoeffding’s D (> 0.50) (linearity vs monotone)

cor_contrast5 <- (abs(cor_pearson_mat) < 0.5) & (abs(cor_hoeffd_mat) > 0.5)
cor_contrast_ind5 <- which(cor_contrast5, arr.ind = T)
nrow(cor_contrast_ind5)
## [1] 0

3-6-1. High Pearson (> 0.90) and Low Hoeffding’s D (< 0.05) (linearity vs monotone)

cor_contrast6 <- (abs(cor_pearson_mat) > 0.9) & (abs(cor_hoeffd_mat) < 0.05)
cor_contrast_ind6 <- which(cor_contrast6, arr.ind = T)
nrow(cor_contrast_ind6)
## [1] 9

3-6-2. Visualization of High Pearson (> 0.90) and Low Hoeffding’s D (< 0.05) (linearity vs monotone)

par(mfrow = c(2, 5))
for (i in 1:nrow(cor_contrast_ind6)){
  index1 <- cor_contrast_ind6[i, 1]; index2 <- cor_contrast_ind6[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                    "\n",
                    paste0("Hoeffding's D of ", round(cor_hoeffd_mat[index1, index2], 3))))
}

3-7-1. Low Pearson (< 0.50) and high Blomqvist’s beta (> 0.50) (linearity vs monotone)

cor_contrast7 <- (abs(cor_pearson_mat) < 0.05) & (abs(cor_blomqvist_mat) > 0.9)
cor_contrast_ind7 <- which(cor_contrast7, arr.ind = T)
nrow(cor_contrast_ind7)
## [1] 7

3-7-2. Visualization of Low Pearson (< 0.50) and high Blomqvist’s beta (> 0.50) (linearity vs monotone)

par(mfrow = c(2, 4))
for (i in 1:nrow(cor_contrast_ind7)){
  index1 <- cor_contrast_ind7[i, 1]; index2 <- cor_contrast_ind7[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                    "\n",
                    paste0("Beta of ", round(cor_blomqvist_mat[index1, index2], 3))))
}

3-8-1. High Pearson (> 0.90) and Low Blomqvist’s beta (< 0.20) (linearity vs monotone)

cor_contrast8 <- (abs(cor_pearson_mat) > 0.9) & (abs(cor_blomqvist_mat) < 0.2)
cor_contrast_ind8 <- which(cor_contrast8, arr.ind = T)
nrow(cor_contrast_ind8)
## [1] 6

3-8-2. Visualization of High Pearson (> 0.90) and Low Blomqvist’s beta (< 0.20) (linearity vs monotone)

par(mfrow = c(2, 4))
for (i in 1:nrow(cor_contrast_ind8)){
  index1 <- cor_contrast_ind8[i, 1]; index2 <- cor_contrast_ind8[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                    "\n",
                    paste0("Beta of ", round(cor_blomqvist_mat[index1, index2], 3))))
}

3-9-1. Low Pearson (< 0.50) and High XI (> 0.50) (linearity vs non-linearity)

cor_contrast9 <- (abs(cor_pearson_mat) < 0.5) & (abs(cor_XI_mat) > 0.5)
cor_contrast_ind9 <- which(cor_contrast9, arr.ind = T)
nrow(cor_contrast_ind9)
## [1] 0

3-10-1. High Pearson (> 0.90) and Low XI (< 0.15) (linearity vs non-linearity)

cor_contrast10 <- (abs(cor_pearson_mat) > 0.9) & (abs(cor_XI_mat) < 0.15)
cor_contrast_ind10 <- which(cor_contrast10, arr.ind = T)
nrow(cor_contrast_ind10)
## [1] 8

3-10-2. Visualization of High Pearson (> 0.90) and Low XI (< 0.15) (linearity vs non-linearity)

par(mfrow = c(2, 4))
for (i in 1:nrow(cor_contrast_ind10)){
  index1 <- cor_contrast_ind10[i, 1]; index2 <- cor_contrast_ind10[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Pearson of ", round(cor_pearson_mat[index1, index2], 3)),
                    "\n",
                    paste0("XI of ", round(cor_XI_mat[index1, index2], 3))))
}

3-11-1. Low Spearman (< 0.05) and High Distance Correlation (> 0.95) (monotone vs non-linearity)

cor_contrast11 <- (abs(cor_spearman_mat) < 0.05) & (abs(cor_dist_mat) > 0.95)
cor_contrast_ind11 <- which(cor_contrast11, arr.ind = T)
nrow(cor_contrast_ind11)
## [1] 10

3-11-2. Visualization of Low Spearman (< 0.05) and High Distance Correlation (> 0.95) (monotone vs non-linearity)

par(mfrow = c(2, 5))
for (i in 1:nrow(cor_contrast_ind11)){
  index1 <- cor_contrast_ind11[i, 1]; index2 <- cor_contrast_ind11[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Spearman of ", round(cor_spearman_mat[index1, index2], 3)),
                    "\n",
                    paste0("Dist. Cor of ", round(cor_dist_mat[index1, index2], 3))))
}

3-12-1. High Spearman (> 0.60) and Low Distance Correlation (< 0.35) (monotone vs non-linearity)

cor_contrast12 <- (abs(cor_spearman_mat) > 0.6) & (abs(cor_dist_mat) < 0.35)
cor_contrast_ind12 <- which(cor_contrast12, arr.ind = T)
nrow(cor_contrast_ind12)
## [1] 6

3-12-2. Visualization of High Spearman (> 0.60) and Low Distance Correlation (< 0.35) (monotone vs non-linearity)

par(mfrow = c(2, 3))
for (i in 1:nrow(cor_contrast_ind12)){
  index1 <- cor_contrast_ind12[i, 1]; index2 <- cor_contrast_ind12[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Spearman of ", round(cor_spearman_mat[index1, index2], 3)),
                    "\n",
                    paste0("Dist. Cor of ", round(cor_dist_mat[index1, index2], 3))))
}

3-13-1. Low Kendall (< 0.50) and High Distance Correlation (> 0.50) (monotone vs non-linearity)

cor_contrast13 <- (abs(cor_kendall_mat) < 0.03) & (abs(cor_dist_mat) > 0.95)
cor_contrast_ind13 <- which(cor_contrast13, arr.ind = T)
nrow(cor_contrast_ind13)
## [1] 5

3-13-2. Visualization of Low Kendall (< 0.50) and High Distance Correlation (> 0.50) (monotone vs non-linearity)

par(mfrow = c(2, 3))
for (i in 1:nrow(cor_contrast_ind13)){
  index1 <- cor_contrast_ind13[i, 1]; index2 <- cor_contrast_ind13[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Kendall of ", round(cor_kendall_mat[index1, index2], 3)),
                    "\n",
                    paste0("Dist. Cor of ", round(cor_dist_mat[index1, index2], 3))))
}

3-14-1. High Kendall (> 0.50) and Low Distance Correlation (< 0.50) (monotone vs non-linearity)

cor_contrast14 <- (abs(cor_kendall_mat) > 0.6) & (abs(cor_dist_mat) < 0.4)
cor_contrast_ind14 <- which(cor_contrast14, arr.ind = T)
nrow(cor_contrast_ind14)
## [1] 0

3-15-1. Low Distance Correlation (< 0.50) and High Hoeffiding’s D (> 0.50) (non-linearity vs monotone)

cor_contrast15 <- (abs(cor_dist_mat) < 0.5) & (abs(cor_hoeffd_mat) > 0.5)
cor_contrast_ind15 <- which(cor_contrast15, arr.ind = T)
nrow(cor_contrast_ind15)
## [1] 0

3-16-1. High Distance Correlation (> 0.50) and Low Hoeffiding’s D (< 0.50) (non-linearity vs monotone)

cor_contrast16 <- (abs(cor_dist_mat) > 0.90) & (abs(cor_hoeffd_mat) < 0.10)
cor_contrast_ind16 <- which(cor_contrast16, arr.ind = T)
nrow(cor_contrast_ind16)
## [1] 1347

3-16-2. Visualization of Low Distance Correlation (< 0.50) and High Hoeffiding’s D (> 0.50) (non-linearity vs monotone)

par(mfrow = c(2, 5))
for (i in 1:10){
  index1 <- cor_contrast_ind16[i, 1]; index2 <- cor_contrast_ind16[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Dist. Cor of ", round(cor_dist_mat[index1, index2], 3)),
                    "\n",
                    paste0("Hoeffiding's D of ", round(cor_hoeffd_mat[index1, index2], 3))))
}

3-17-1. Low Distance Correlation (< 0.50) and High XI (> 0.50) (non-linearity vs non-linearity)

cor_contrast17 <- (abs(cor_dist_mat) < 0.5) & (abs(cor_XI_mat) > 0.5)
cor_contrast_ind17 <- which(cor_contrast17, arr.ind = T)
nrow(cor_contrast_ind17)
## [1] 0

3-18-1. High Distance Correlation (> 0.90) and Low XI (< 0.10) (non-linearity vs non-linearity)

cor_contrast18 <- (abs(cor_dist_mat) > 0.9) & (abs(cor_XI_mat) < 0.1)
cor_contrast_ind18 <- which(cor_contrast18, arr.ind = T)
nrow(cor_contrast_ind18)
## [1] 651

3-18-2. Visualization of High Distance Correlation (> 0.90) and Low XI (< 0.10) (non-linearity vs non-linearity)

par(mfrow = c(2, 5))
for (i in 1:10){
  index1 <- cor_contrast_ind18[i, 1]; index2 <- cor_contrast_ind18[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Dist. Cor of ", round(cor_dist_mat[index1, index2], 3)),
                    "\n",
                    paste0("XI of ", round(cor_XI_mat[index1, index2], 3))))
}

3-19-1. Low Distance Correlation (< 0.10) and High Blomqvist’s Beta (> 0.90) (non-linearity vs non-linearity)

cor_contrast19 <- (abs(cor_dist_mat) < 0.1) & (abs(cor_blomqvist_mat) > 0.9)
cor_contrast_ind19 <- which(cor_contrast19, arr.ind = T)
nrow(cor_contrast_ind19)
## [1] 21

3-19-2. Visualization of Low Distance Correlation (< 0.10) and High Blomqvist’s Beta (> 0.90) (non-linearity vs non-linearity)

par(mfrow = c(2, 5))
for (i in 1:10){
  index1 <- cor_contrast_ind19[i, 1]; index2 <- cor_contrast_ind19[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Dist. Cor of ", round(cor_dist_mat[index1, index2], 3)),
                    "\n",
                    paste0("Blomqvist's Beta of ", round(cor_blomqvist_mat[index1, index2], 3))))
}

3-20-1. High Distance Correlation (> 0.90) and Low Blomqvist’s Beta (< 0.10) (non-linearity vs non-linearity)

cor_contrast20 <- (abs(cor_dist_mat) > 0.9) & (abs(cor_blomqvist_mat) < 0.1)
cor_contrast_ind20 <- which(cor_contrast20, arr.ind = T)
nrow(cor_contrast_ind20)
## [1] 286

3-20-2. Visualization of High Distance Correlation (> 0.90) and Low Blomqvist’s Beta (< 0.10) (non-linearity vs non-linearity)

par(mfrow = c(2, 5))
for (i in 1:10){
  index1 <- cor_contrast_ind20[i, 1]; index2 <- cor_contrast_ind20[i, 2]
  plot(sub_dat[,index1], sub_dat[,index2], col = sub_cluster_labels, asp = T,
       pch = 16, xlab = paste0(colnames(sub_dat)[index1], ", (", index1, ")"),
       ylab = paste0(colnames(sub_dat)[index2], ", (", index2, ")"), 
       main = paste(paste0("Dist. Cor of ", round(cor_dist_mat[index1, index2], 3)),
                    "\n",
                    paste0("Blomqvist's Beta of ", round(cor_blomqvist_mat[index1, index2], 3))))
}